cat gold.txt | sed 's/ [0-9]\+ :: /\t/g' | sed 's/ \+[0-9];/\t/g' | sed 's/ /_/g' | awk '{for(i=1;i<=NF;i++) print $i;}' | sort | uniq | sed 's/\.\([a-z]\)/\t\1/g' | grep -v '_'  > words.txt

